#### 101机器学习 ###########################

setwd('父级路径')
library(snowfall)
library(devtools)
library(CoxBoost)
library(fastAdaboost)
library(Mime1)
library(IOBR)
library(dplyr)
library(immunedeconv)
library(xgboost)
library(UpSetR)
library(Cairo)
library(ggplot2)


# 需要准备的数据
## 可以使用多组数据进行组合进行
# 第一组  CGGA_OS.txt 和CGGA_FPKM.txt

surv = read.table(file = 'CGGA_OS.txt', sep = '\t', header = TRUE)
colnames(surv)[1] <- "OS"
colnames(surv)[2] <- "OS.time"
surv <- surv[,-3]
rt1 = read.table("CGGA_FPKM.txt", sep = "\t")
rownames(rt1) <- rt1[,1]
colnames(rt1) <- rt1[1,]
rt1 <- rt1[-1,-1]
expr <- rt1
comgene <- intersect(colnames(expr),rownames(surv))
table(substr(comgene,14,16))
expr <- expr[,comgene]
surv <- surv[comgene,]
deg_expr <- expr %>% t() %>% as.data.frame()
surv.expr <- cbind(surv,deg_expr)
surv.expr <- cbind(row.names(surv.expr), surv.expr)
colnames(surv.expr) <- c("ID", names(surv.expr)[-1])
Dataset2 <- surv.expr
#data2 组合完毕

surv = read.table(file = 'TCGA-GBM.survival.tsv', sep = '\t', header = TRUE)
surv2 = read.table(file = 'TCGA-LGG.survival.tsv', sep = '\t', header = TRUE)
merged_df <- rbind(surv, surv2)
sorted_df <- merged_df[order(merged_df[,4], decreasing = FALSE),]
surv <- sorted_df
# 提前处理下数据防止服务器报错 windows 没问题
# 去重复 或者有更好的解决办法
surv<-surv[!duplicated(surv$sample),]

#整理生存信息数据
surv$sample <- gsub("-",".",surv$sample)


rownames(surv) <- surv$sample
surv <- surv[,-1]
surv <- surv[,-2]
rt = read.table("GLIOMA_FPKM_MRNA2.txt", sep = "\t")
expr <- rt
colnames(expr) <- substr(colnames(expr), 1, 16)
# 替换列名中的 "-" 为 "."
colnames(expr) <- gsub("-", ".", colnames(expr))

comgene <- intersect(colnames(expr),rownames(surv))
table(substr(comgene,14,16))
expr <- expr[,comgene]
surv <- surv[comgene,]
deg_expr <- expr %>% t() %>% as.data.frame()
surv.expr <- cbind(surv,deg_expr)
surv.expr <- cbind(row.names(surv.expr), surv.expr)
colnames(surv.expr) <- c("ID", names(surv.expr)[-1])
Dataset1 <- surv.expr
# 第二组数据组合完毕
# 假设Dataset1是一个数据框或数据表格
# 首先，确保Dataset1存在并且是一个数据框或数据表格
if (exists("Dataset1") && is.data.frame(Dataset1)) {
  # 互换第二列和第三列的位置
  Dataset1 <- Dataset1[, c(1, 3, 2, 4:ncol(Dataset1))]
  
  # 如果需要的话，可以将结果重新赋值给Dataset1
  # Dataset1 <- swapped_dataset
} else {
  # 如果Dataset1不存在或者不是数据框，需要处理异常情况
  print("Dataset1不存在或不是一个数据框。请检查数据。")
}
if (exists("Dataset2") && is.data.frame(Dataset2)) {
  # 互换第二列和第三列的位置
  Dataset2 <- Dataset2[, c(1, 3, 2, 4:ncol(Dataset2))]
} else {
  print("Dataset2不存在或不是一个数据框。请检查数据。")
}

list_train_vali_Data <- list(Dataset1 = Dataset1,
                             Dataset2 = Dataset2)

list <- read.table("list.txt", header = F,sep = "\t", quote = "", check.names = F)
genelist <- list$V1

try({
  load('res.rdata')
})

if (!exists("res")) {
  message("res 不存在 执行计算")
  res <- ML.Dev.Prog.Sig(train_data = list_train_vali_Data$Dataset1,
                         list_train_vali_Data = list_train_vali_Data,
                         unicox.filter.for.candi = T,
                         unicox_p_cutoff = 0.05,
                         candidate_genes = genelist,
                         mode = 'all',nodesize =5,seed = 5201314 )
}else{
  message("res 存在 引用算好的")
}

if (!exists("res")) {
  message("保存res数据")
  save(res,file = 'res.rdata')
}

# res <- ML.Dev.Prog.Sig(train_data = list_train_vali_Data$Dataset1,
#                        list_train_vali_Data = list_train_vali_Data,
#                        unicox.filter.for.candi = T,
#                        unicox_p_cutoff = 0.05,
#                        candidate_genes = genelist,
#                        mode = 'all',
#                        nodesize =5,
#                        seed = 123)
custom_colors <- c("#4195C1", "#ecf0f1", "#CB5746","#3fa0c0", "#d5d9e5")
hm <- cindex_dis_all(
  res,
  color = custom_colors,       # 传递自定义颜色设置
  validate_set = "Dataset1",   # 指定测试集的名称
  #order = names(list_train_vali_Data),
  width = 0.35
)
cellwidth = 1
cellheight = 0.5
Cindex.res <- res[["Cindex.res"]]
pdf(file.path( "heatmap1.pdf"), width = cellwidth * 4 + 5, height = cellheight * nrow(Cindex.res) * 0.1)
print(hm)
dev.off()
# 这里面不能用这个画图 windows 没问题linux 不可以 当前只能使用png画图
#CairoTIFF(file="heatmap1.tiff", width=cellwidth * 4 + 5, height=cellheight * nrow(Cindex.res) * 0.1,units="in",dpi=120)
png(filename = 'heatmap1.png',width = 800,height = 800)
hm
dev.off()

riskscore <- res[["riskscore"]]
riskscore_1 <- riskscore[["StepCox[forward] + GBM"]]
riskscore_1_TCGA <- riskscore_1[["Dataset1"]]
riskscore_1_CGGA <- riskscore_1[["Dataset2"]]
write.table(riskscore_1_TCGA,file="TCGAriskcore.txt",sep = "\t",row.names = T,col.names = NA,quote = F)
write.table(riskscore_1_CGGA,file="CGGAriskscore.txt",sep = "\t",row.names = T,col.names = NA,quote = F)

# 输出结果的全部数据

allRiskName <- names(riskscore)
try({
  
  for (variable in allRiskName) {
    riskscore_1 <- riskscore[[variable]]
    riskscore_1_TCGA <- riskscore_1[["Dataset1"]]
    riskscore_1_CGGA <- riskscore_1[["Dataset2"]]
    #TCGArisk<-paste0('TCGArisk',variable,".txt")
    #CGGArisk<-paste0('CGGArisk',variable,".txt")
    # 解决编码导致输出文件中断
    TCGArisk<-enc2utf8( paste0('TCGArisk',variable,".txt"))
    CGGArisk<-enc2utf8(paste0('CGGArisk',variable,".txt"))
    write.table(riskscore_1_TCGA,file=TCGArisk,sep = "\t",row.names = T,col.names = NA,quote = F)
    write.table(riskscore_1_CGGA,file=CGGArisk,sep = "\t",row.names = T,col.names = NA,quote = F)
  }
})


try({
  res.feature.all <- read.table('res.feature.all.txt',sep="\t",header=T,check.names=F)
})


if (!exists("res.feature.all")) {
  message("res.feature.all 不存在 执行计算")
  res.feature.all <- ML.Corefeature.Prog.Screen(InputMatrix = list_train_vali_Data$Dataset1,
                                                candidate_genes = genelist,
                                                mode = "all",nodesize =5,seed = 5201314 )
}else{
  message("res.feature.all 存在")
}

if (!exists("res.feature.all")) {
  message("不存在 res.feature.all.txt")
  write.table(res.feature.all,"res.feature.all.txt", col.names = T, row.names = F, sep = "\t", quote = F)
}



write.table(res.feature.all,"所有算法筛选对象.txt", col.names = T, row.names = F, sep = "\t", quote = F)



# core_feature_select(res.feature.all) ##空的
##实现上一步骤 Upset图

# 设置颜色和比例
col <- c("#E18727", "#B09C85", "#ADB6B6", "#B09C85")
mb.ratio <- c(0.6, 0.4)

# 创建核心特征列表
core_feature_list <- list()
for (i in unique(res.feature.all$method)) {
  core_feature_list[[i]] <- res.feature.all[res.feature.all$method == i, "selected.fea"]
}

# 创建 UpSet 图
p1 <- upset(
  fromList(core_feature_list),
  sets = names(core_feature_list),
  order.by = "freq",
  nintersects = NA,
  mb.ratio = mb.ratio,
  keep.order = TRUE,
  mainbar.y.label = "Shared gene number",
  sets.x.label = "Total gene number",
  point.size = 2,
  line.size = 1,
  sets.bar.color = col[1],
  main.bar.color = col[2],
  matrix.color = col[3],
  shade.color = col[4]
)

# 显示 UpSet 图
pdf("core_feature_upset_plot.pdf", width = 10, height = 6)
print(p1)
dev.off()
# tryCatch({
#
# },finally = {
#   dev.off()
# })


# CairoTIFF(file="core_feature_upset_plot.tiff", width=150, height=150,units="in",dpi=120)
# CairoTIFF(file="core_feature_upset_plot.tiff", width=cellwidth * 4 + 5, height=cellheight * nrow(Cindex.res) * 0.1,units="in",dpi=120)
# p1
# dev.off()

png(filename = 'core_feature_upset_plot.png',width = 800,height = 800)
p1
dev.off()

# tryCatch({
#
# },finally = {
#   dev.off()
# })


